import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv('2019_nCoV_data.csv')
data.head()
data.dtypes
data.describe()
data.describe(include="O")
data.info()
data.isna().sum()
data["Country"].unique()
Excluding country name Others
data=data[data.Country!='Others']
data.head()
data["Date"] = pd.to_datetime(data["Date"])
data['Last Update'] = pd.to_datetime(data['Last Update'])
data['Last Update'].head()
Checking Date-wise Trend of confirmed patients and number of death
data.groupby('Date').sum()['Confirmed'].plot(color= 'y',linestyle='--', linewidth=3)
plt.grid()
data.groupby('Date').sum()['Deaths'].plot(color= 'r',linestyle='--', linewidth=3)
plt.grid()
data.groupby('Date').sum()['Recovered'].plot(color= 'g',linestyle='--', linewidth=3)
plt.grid()
data.groupby('Date').sum()[['Recovered','Deaths','Confirmed']].plot(linestyle='--', linewidth=3)
plt.grid()
data.groupby('Country').sum()[['Recovered','Deaths','Confirmed']].plot(linestyle='--', linewidth=3)
plt.grid()
Country-wise Distribution
country_count = data['Country'].value_counts().reset_index()
country_count.head()
country_count = country_count.rename(columns={"index": "Country", "Country": "Count"})
country_count.head()
##country_count.set_index(['Country'],inplace=True)
##country_count.set_index(['Count'],inplace=True)
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.barplot(x='Count',y='Country',data=country_count)
country_count = pd.DataFrame(data.groupby('Country')['Confirmed'].max()).reset_index()
country_count.head()
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.barplot(x='Confirmed',y='Country',data=country_count)
country_count_wo_mc=country_count[country_count.Country != 'Mainland China']
country_count_wo_mc.head()
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.barplot(x='Confirmed',y='Country',data=country_count_wo_mc)
conda install -c plotly plotly
import plotly.graph_objects as go
fig = go.Figure(data = [go.Pie(labels = country_count_wo_mc.Country,values = country_count_wo_mc.Confirmed)])
fig.update_layout(title="Country-wise distribution except Mainland China")
fig.show()
state_count = pd.DataFrame(data[data['Country']=='Mainland China'].groupby('Province/State').max()['Confirmed']).reset_index()
fig = go.Figure(data = [go.Pie(labels = state_count['Province/State'],values = state_count.Confirmed)])
fig.update_layout(title="State-wise distribution of Mainland China")
fig.show()
Growth trend of Corona virus in each Country individually except Mainland China
growth_country = pd.pivot_table(data[data.Country!='Mainland China'], values='Confirmed', columns='Country', index='Date')
growth_country.plot(figsize=(18,12))
Growth trend of Corona virus in each Province/State of Mainland China individually
growth_state = pd.pivot_table(data[data['Country']=='Mainland China'], values='Confirmed', columns='Province/State', index='Date')
growth_state.plot(figsize=(18,12))
Country affected on the first day and number of confirmed cases on that date in each country.
initial_country=data[data['Date'] == data['Date'][0]]['Country'].unique()
initial_country
data[data['Date'] == data['Date'][0]].groupby('Country').sum()['Confirmed']
Total Confirmed, Recovery and Death in each country
global_case_wo_mc = data[data.Country != 'Mainland China'].groupby('Country')['Confirmed','Deaths','Recovered'].max().reset_index()
global_case_wo_mc
fig = go.Figure(data=[
go.Bar(name='Confirmed', x=global_case_wo_mc['Country'], y=global_case_wo_mc['Confirmed']),
go.Bar(name='Deaths', x=global_case_wo_mc['Country'], y=global_case_wo_mc['Deaths']),
go.Bar(name='Recovered', x=global_case_wo_mc['Country'], y=global_case_wo_mc['Recovered']),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
fig = go.Figure(data=[
go.Bar(name='Confirmed', x=global_case_wo_mc['Country'], y=global_case_wo_mc['Confirmed']),
go.Bar(name='Deaths', x=global_case_wo_mc['Country'], y=global_case_wo_mc['Deaths']),
go.Bar(name='Recovered', x=global_case_wo_mc['Country'], y=global_case_wo_mc['Recovered']),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
Total Confirmed, Recovery and Death in each Province/State of Mainland China
mainland_case = data[data['Country']=='Mainland China'].groupby('Province/State')['Confirmed','Deaths','Recovered'].sum().reset_index()
mainland_case.head()
sns.barplot(x='Confirmed',y='Province/State',data=mainland_case)
fig = go.Figure(data=[
go.Bar(name='Confirmed', x=mainland_case['Province/State'], y=mainland_case['Confirmed']),
go.Bar(name='Deaths', x=mainland_case['Province/State'], y=mainland_case['Deaths']),
go.Bar(name='Recovered', x=mainland_case['Province/State'], y=mainland_case['Recovered']),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
Death Trend Globally
recovery_country = pd.pivot_table(data, values='Recovered', columns='Country', index='Date')
recovery_country.plot(figsize=(18,12))
death_country = pd.pivot_table(data, values='Deaths', columns='Country', index='Date')
death_country.plot(figsize=(18,12))
global_case = data.groupby('Country')['Confirmed','Deaths','Recovered'].max().reset_index()
global_case
world_coordinates = pd.read_csv("world_coordinates.csv")
# Creating a dataframe with total no of confirmed cases for every country
Number_of_countries = len(data['Country'].value_counts())
cases = pd.DataFrame(data.groupby('Country')['Confirmed'].sum())
cases['Country'] = cases.index
cases.index=np.arange(1,Number_of_countries+1)
global_cases = cases[['Country','Confirmed']]
#global_cases.sort_values(by=['Confirmed'],ascending=False)
global_cases
# Merging the coordinates dataframe with original dataframe
world_data = pd.merge(world_coordinates,global_cases,on='Country')
world_data.head()
# create map and display it
world_map = folium.Map(location=[10, -20], zoom_start=2.3,tiles='Stamen Toner')
for lat, lon, value, name in zip(world_data['latitude'], world_data['longitude'], world_data['Confirmed'], world_data['Country']):
folium.CircleMarker([lat, lon],
radius=10,
popup = ('<strong>Country</strong>: ' + str(name).capitalize() + '<br>'
'<strong>Confirmed Cases</strong>: ' + str(value) + '<br>'),
color='red',
fill_color='red',
fill_opacity=0.7 ).add_to(world_map)
world_map
#Mainland China
China1 = data[data['Country']=='China']
China2 = data[data['Country']=='Mainland China']
China1.append(China2)
import pycountry
countries = {}
for country in pycountry.countries:
countries[country.name] = country.alpha_3
country_count["iso_alpha"] = country_count['Country'].map(countries.get)
There are some countries whose iso_alpha value is not present and it is returning it as None. Droping those records.
filetered_country=country_count.dropna()
filetered_country.head()
import plotly.express as px
df = px.data.gapminder().query("year == 2007")
fig = px.choropleth(filetered_country, locations="iso_alpha",
color='Count',
color_continuous_scale=px.colors.sequential.Plasma,
projection="natural earth")
fig.show()